clear all
set more off
cap log close

do "H:\Lavecchia_7086\to-transfer-jan-2022\RESTAT_Replication_Programs\0_Set_Directories.do"


****************************************************************************
* This do file calculate mtr's of individuals
* 
****************************************************************************

cap log using "$dir_log\2h_Regressions_Demographics_Prep.log", replace


****************************************************************************
***** A: Calculate MTR actual **********
****************************************************************************
clear
foreach years in 8299 0016 {
use "$dir_data\capital_income_final_`years'.dta"

*keep lin__i year tirc_i clkgxi  wgt2_i cpi_to2016
keep lin__i year tirc_i clkgxi  wgt2_i cpi_to2016 ggex_i

* merge with business owners
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\business_owners.dta"
drop if _merge==2
drop _merge


sort lin__i year
merge 1:1 lin__i year using "$dir_data\demographic_`years'.dta"
*merge 1:1 lin__i year using  "H:\Lavecchia_7086\to-transfer-jan-2022\2_Alisa_Data\Data_files-core\demographic_`years'.dta"
drop if _merge==2
drop _merge
drop  cd_i  tnk19i 

drop if (se_income_lastyear==1 | se_income_thisyear==1) & se_income_nextyear !=1 & clkgxi>0
drop se_income_lastyear se_income_thisyear se_income_nextyear 

sort lin__i  
drop age__ sxco_i
merge lin__i using "$dir_data\demographic_permanent.dta"
keep if _merge!=2
drop _merge

drop if yob__i > year  /* drops a handfull of cases (e.g. 85 inidvidual-year observations out of about 69.25 million in 1982-1999 ) */
gen age82=1982-yob__i
gen age=year-yob__i
gen age2=age^2
gen age3=age^3
gen age4=age^4
/* New sample restriction (February 27, 2020): keep only whose age in 1990 is between 20 and 60 (to get balanced panel) */
gen age90 = 1990 - yob__i
drop if age90 < 20
drop if age90 > 90

/* Try creating a variable for 2-digit postal code (Adam edit - February 2, 2022) */
gen psco_2_digit = substr(psco_i,1,2)
replace psco_i= substr(psco_i,1,3)
egen num_psco_i=group(psco_i)
egen num_psco_2_digit=group(psco_2_digit)
egen num_sxco_i=group(sxco_i)

/* Try creating a variable for 2-digit postal code (Adam edit - February 2, 2022) */


* note iemcop =@ for most individuals in 1989
gen num_iemcop=1
replace num_iemcop=0 if iemcop=="" | iemcop=="@"
drop  psco_i psco_2_digit sxco_i iemcop yob__i
replace fcmp_i=fcmp_i+10


save "$dir_data\data_`years'_with_demographics.dta", replace

}


* available controls:
* taxable and gross income
* province
* age, business ownership status
* family type, family size, postal code, sex, immigration code

